Render this report with ~/spinal_cord_paper/scripts/Gg_devel_scWGCNA_module_analysis_render.sh.

library(Seurat)
## Attaching SeuratObject
library(WGCNA)
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
## 
## 
## Attaching package: 'WGCNA'
## The following object is masked from 'package:stats':
## 
##     cor
library(tidyr)
library(ggplot2)
library(stringr)
library(patchwork)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ readr   1.4.0      ✔ forcats 0.5.1 
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:patchwork':
## 
##     align_plots
library(pheatmap)
source("~/Neuraltube/scripts/heatmap4.R")

Load individual seurat and test WGCNA data

The individual data sets are the Day 5 (Gg_D05_ctrl_seurat_070323), Day 7 (Gg_D07_ctrl_seurat_070323), and Day 10 (Gg_ctrl_1_seurat_070323) chicken spinal cord sets. The test WGCNA data are the modules calculated on the integrated data set of all three stages.

se_path <- c("Gg_D05_ctrl_seurat_070323",
             "Gg_D07_ctrl_seurat_070323",
             "Gg_ctrl_1_seurat_070323")

clust_col <- read.csv("~/spinal_cord_paper/annotations/broad_cluster_marker_colors.csv") %>% 
  rename(broad = broad_cluster) %>% 
  select(-marker)

Order of the broad clusters for plotting purposes.

broad_order <- c("progenitors",
      "FP",
      "RP",
      "FP/RP",
      "neurons",
      "OPC",
      "MFOL",
      "pericytes",
      "microglia",
      "blood",
      "vasculature"
      )
my.ses <- list()
col_table <- list()
ord_levels <- list()

for (i in seq(se_path)) {
  # load the data sets
  my.se <- readRDS(paste0("~/spinal_cord_paper/data/", se_path[i], ".rds"))
  annot <- read.csv(list.files("~/spinal_cord_paper/annotations",
                               pattern = str_remove(se_path[i], "_seurat_\\d{6}"),
                               full.names = TRUE))
  
  if(length(table(annot$number)) != length(table(my.se$seurat_clusters))) {
     stop("Number of clusters must be identical!")
  }
  
  # rename for left join
  annot <- annot %>% 
    mutate(fine = paste(fine, number, sep = "_")) %>% 
    mutate(number = factor(number, levels = 1:nrow(annot))) %>% 
    rename(seurat_clusters = number) 
  
  # cluster order for vln plots
  ord_levels[[i]] <- annot$fine[order(match(annot$broad, broad_order))]
  
  # create index for color coding
  col_table[[i]] <- annot %>%
    left_join(clust_col, by = "broad") %>% 
    select(c("fine", "color"))
  
  # add cluster annotation to meta data
  my.se@meta.data <- my.se@meta.data %>% 
    rownames_to_column("rowname") %>% 
    left_join(annot, by = "seurat_clusters") %>% 
    mutate(fine = factor(fine, levels = annot$fine)) %>% 
    column_to_rownames("rowname")
  
  my.ses[[i]] <- my.se

}

names(my.ses) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
names(col_table) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
names(ord_levels) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")

rm(my.se, annot)
# The reference WGCNA data. We can have several, if we want to test many at the same time
WGCNA_data = list()
WGCNA_data[[1]] = readRDS("~/spinal_cord_paper/output/Gg_devel_int_scWGCNA_250723.rds")
my.wsub =list()
my.wsub[[1]]= c(1:22)

# the name of each sample, as they appear in my.files and in the metadata of the combined object
my.samplenames = c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")

# The subset of clusters in each of the corresponding samples
my.subset=list(c(1:24),
               c(1:26),
               c(1:22))

# This is just to add a little bit more sense to the modules, so that we don't get just a color. Corresponds to WGCNA_data
my.modulenames = list()
my.modulenames[[1]] = c(1:22)
#Subset the seurat objects if needed as defined above
for (i in 1:length(my.ses)) {
  my.ses[[i]] = subset(my.ses[[i]], idents = my.subset[[i]])
}

# Take only genes that are present in the all samples
my.dcs = list()

# We go trhough each WGCNA data
for(i in 1:length(WGCNA_data)) {
  #We start with complete modules
  my.dc = WGCNA_data[[i]]$dynamicCols
  
  #Remove the few genes that are not found in the other datasets
  my.dc = my.dc[which(names(my.dc) %in% rownames(my.ses[[1]]@assays$RNA@data))]
  my.dc = my.dc[which(names(my.dc) %in% rownames(my.ses[[2]]@assays$RNA@data))]
  my.dc = my.dc[which(names(my.dc) %in% rownames(my.ses[[3]]@assays$RNA@data))]
  
  my.dc = my.dc[which(my.dc %in% names(table(WGCNA_data[[i]]$dynamicCols))[my.wsub[[i]]])]
  
  my.dcs[[i]] = my.dc
  
}

Module gene correlation

Here, we do a correlation matrix / heatmap, to see which cell clusters group togheter. This helps us to make more detailed dotplots.
This part of the script can still be used to compare several WGCNA datasets in parallel.

# broad cluster color table
all_col <- do.call(rbind, col_table) %>% 
  rownames_to_column("sample") %>% 
  mutate(sample = substr(sample, 1, 11)) %>% 
  mutate(sample_celltype = paste(sample, fine, sep = "_")) %>% 
  select(c("color", "sample_celltype", "sample"))
# take the normalized data, of each single-cell object.
my.datExpr = list()

# Go trough each seurat object
for (i in 1:length(my.ses)) {
 my.datExpr[[i]] = data.frame(my.ses[[i]]@assays$RNA@data)
}

# Calculate, for every cell, in every sample, the average expression of each of the modules.
my.MEs = list()

# For each set of WGCNA modules
for (i in 1:length(WGCNA_data)) {
  
  # Make a new list
  my.MEs[[i]] = list()
  
  # Populate with data frames from each seurat object
  for (j in 1:length(my.ses)) {
    
    # Data frame of average module expression, per cell.
    my.MEs[[i]][[j]] = moduleEigengenes(t(my.datExpr[[j]][names(my.dcs[[i]]),]), colors = my.dcs[[i]])
  }
}

#Calculate average of expression, per sample and cell cluster
my.modavg = list()

for (i in 1:length(WGCNA_data)) {
  
  my.modavg[[i]] = list()
  
  for (j in 1:length(my.ses)) {
    
    #Make the mean per cell clusters
    my.modavg[[i]][[j]] = aggregate(
      my.MEs[[i]][[j]]$averageExpr,
      list(my.ses[[j]]@meta.data$seurat_clusters),
      mean
      )
    # Give the cell clusters meaningful names
    rownames(my.modavg[[i]][[j]]) = paste0(
      my.samplenames[j],
      "_",
      levels(my.ses[[j]]@meta.data$fine)[my.subset[[j]]]
      )
    }
}



#Gather data to plot

my.wgmat = list()

for (i in 1:length(WGCNA_data)) {
  
  #bind the expression data into one dataframe
  my.wgmat[[i]] = data.table::rbindlist(my.modavg[[i]])
  #Get rid of the extra column
  my.wgmat[[i]] = data.frame(my.wgmat[[i]][,-1])
  #Restore the rownames
  rownames(my.wgmat[[i]]) = unlist(sapply(my.modavg[[i]], rownames))

}

#Get a dataframe with annotations for all the samples and colors we need.
my.metam <- list()

for (i in seq(my.ses)) {
  my.metam[[i]] <- my.ses[[i]][[]]
  rownames(my.metam[[i]]) <- paste0(rownames(my.metam[[i]]), "_", i)
}
my.metam <- do.call(rbind, my.metam)

my.metam$orig.ident <- str_replace_all(my.metam$orig.ident, pattern =  "Gg_ctrl_1", "Gg_D10_ctrl")

# my.metam$sample_celltype = paste0(substr(my.metam$orig.ident,7,9),"_",my.metam$seurat_clusters)
my.metam$sample_celltype = paste0(my.metam$orig.ident, "_", my.metam$fine)

my.metam <- my.metam %>% 
  tibble::rownames_to_column(var = "cell_ID") %>%
  dplyr::left_join(all_col, by = "sample_celltype") %>%
  tibble::column_to_rownames(var = "cell_ID")


# get sample colors
my.colsm = c("grey", "grey30", "black")
names(my.colsm) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
 
#The colors for the samples and clusters. First the closest to the heatmap. Add a white space to easy the eye and make less confussing
my.heatcols =list()
for (i in 1:length(my.wgmat)) {
  my.heatcols[[i]] = as.matrix(data.frame(
    cluster= as.character(all_col$color[match(rownames(my.wgmat[[i]]), all_col$sample_celltype)]),
    "." = "white",
    sample= as.character(my.colsm[match(all_col$sample, names(my.colsm))]))
    )
}

rm(my.datExpr, my.MEs, my.dcs)

spearman correlation heatmap

annotations

# names and colors for the heatmap annotation
annot_name <- data.frame(
  "Celltypes" = all_col$sample_celltype,
  "Sample"    = all_col$sample,
  row.names = all_col$sample_celltype)
  

pheat_col_table <- do.call(rbind, col_table) %>% 
  rownames_to_column("sample") %>% 
  mutate(sample = substr(sample, 1,11)) %>% 
  mutate(fine = paste(sample, fine, sep = "_"))

# match color table with annotation
pheat_col_table <- pheat_col_table[match(annot_name$Celltypes, pheat_col_table$fine),]

annot_col <- list(
  Celltypes = pheat_col_table$color,
  Sample = c(Gg_D05_ctrl = "#A4A4A4",
             Gg_D07_ctrl = "#515151",
             Gg_D10_ctrl = "#000000")
  )

names(annot_col[[1]]) <- annot_name$Celltypes
heat_col <- colorRampPalette(colors = c("dodgerblue4","dodgerblue", "white", "red", "darkred"))

toplot <- cor(t(my.wgmat[[1]]), method = "spearman")

#Calculate the distance 1-cor, and then calculate dendograms to cluster the clusters
my.hclust = hclust(as.dist(1-cor(t(my.wgmat[[1]]), method = "spearman")))

# lower limit to scale color bar
low_limit <- 100 - abs(round(range(toplot)[1]*100))

pheatmap(toplot,revC = T,
                 main = "spearman correlation of average module eigengene expression by cluster\n Dendrogram based on 'distance 1-cor'",
                 fontsize = 8,  
                 color = heat_col(200)[low_limit:200],
                 show_colnames = F,
                 cluster_rows = my.hclust,
                 cluster_cols = my.hclust,
                 treeheight_row = 0, 
                 annotation_col = annot_name,
                 annotation_colors = annot_col,
                 annotation_legend = F,
                 border_color = NA
                 )

colbar <- c(min(cor(t(my.wgmat[[1]]), method = "spearman")),
            max(cor(t(my.wgmat[[1]]), method = "spearman")))

#Calculate the distance 1-cor, and then calculate dendograms to cluster the clusters
my.hclust = as.dendrogram(hclust(as.dist(1-cor(t(my.wgmat[[1]]), method = "spearman"))))

#Plot!
pdf("~/spinal_cord_paper/figures/Fig_1_module_gene_heatmap_spearman.pdf", width = 11, height = 11)
heatmap.4(cor(t(my.wgmat[[1]]), method = "spearman"),
          col = colorRampPalette(c("dodgerblue4","dodgerblue", "white", "red", "darkred"))(n = 1000),
          trace="none",
          lhei=c(0.2,1.6),
          lwid=c(0.2,1.6),
          scale = "none",margins = c(10,10),
          cexCol = 0.75,
          revC = TRUE,
          ColSideColors = my.heatcols[[1]],
          RowSideColors = t(my.heatcols[[1]][,3:1]),Rowv = my.hclust, Colv = my.hclust)


pdf("~/spinal_cord_paper/figures/Fig_1_module_gene_heatmap_color_key_spearman.pdf", width = 10, height = 10)
# heatmap since color key is missing in first heatmap
heatmap.4(rbind(colbar, colbar),
          col = colorRampPalette(c("dodgerblue4","dodgerblue", "white", "red", "darkred"))(n = 1000),
          trace="none",
          lhei=c(0.2,1.6),
          lwid=c(0.2,1.6),
          scale = "none",margins = c(10,10),
          cexCol = 0.75)

heatmap of module pseudobulk average expression

# module colors
my.colcols = list()

for (i in 1:length(my.wgmat)) {
    my.colcols[[i]] = as.matrix(names(table(WGCNA_data[[i]]$dynamicCols)))
}

avg.mod.eigengenes <- WGCNA_data[[1]]$sc.MEList$averageExpr
rm(WGCNA_data)

htmp <- heatmap.4(as.matrix(my.wgmat[[1]]),
          col = colorRampPalette(c("dodgerblue4","dodgerblue", "white", "red", "darkred"))(n = 1000),
          trace="none",
          scale = "row",
          margins = c(10,10),
          ColSideColors = my.colcols[[1]], 
          RowSideColors = t(my.heatcols[[1]]))

module_order <- htmp[["colInd"]]

pdf("~/spinal_cord_paper/figures/Devel_module_v_clusters_heatmap.pdf", width = 7, height = 10)
heatmap.4(as.matrix(my.wgmat[[1]]),
          col = colorRampPalette(c("dodgerblue4","dodgerblue", "white", "red", "darkred"))(n = 1000),
          trace="none",
          scale = "row",
          margins = c(10,10),
          ColSideColors = my.colcols[[1]], 
          RowSideColors = t(my.heatcols[[1]]))

Load the integrated data set

The integrated data set on which the WGCNA is calculated. We plot it, split by the original cell types from the three original samples.

# This is a file of all the combined mouse datasets, normalized and such.
my.sec = readRDS("~/spinal_cord_paper/data/Gg_devel_int_seurat_250723.rds")

identical(rownames(my.metam), colnames(my.sec))
## [1] TRUE
#Which WGCNA dataset are we using?
my.W = 1

#Choose how many groups of cell clusters we want to use. Based on the distance clustering from above
my.clcl = cutree(hclust(as.dist(1-cor(t(my.wgmat[[my.W]])))), k = 5)

my.metam$sample_celltype <- factor(my.metam$sample_celltype, levels = names(my.clcl))
#Set the identities of the integrated data, to the annotated clusters
my.sec = SetIdent(my.sec, value = my.metam$sample_celltype)

p1 <- DimPlot(
  my.sec,
  reduction = "tsne",
  label = TRUE,
  repel = TRUE,
  cols = my.heatcols[[1]][,"cluster"],
  split.by = "orig.ident"
  ) + 
  NoLegend()

p1

pdf("~/spinal_cord_paper/figures/Devel_split_tsne.pdf", height = 7, width = 15)
#Plot split tsne
p1

Avg. module exp. by stage

tSNE DimPlots showing the average expression of each module by stage.

for (i in seq(my.ses)) {
  # prepare average expression table
  tmp <- avg.mod.eigengenes %>%
    tibble::rownames_to_column("cell_ID") %>%
    dplyr::filter(grepl(paste0("_", i, "$"), cell_ID)) %>%
    dplyr::mutate(cell_ID = stringr::str_remove_all(cell_ID, paste0("_", i))) %>%
    tibble::column_to_rownames("cell_ID")
  
  identical(rownames(tmp), colnames(my.ses[[i]]))
  # add meta data to the seurat objects
  my.ses[[i]] <- AddMetaData(my.ses[[i]], tmp)
}

#max and min expression per module (column max)
mod_max <- apply(avg.mod.eigengenes, MARGIN = 2, FUN = max)[module_order]
mod_min <- apply(avg.mod.eigengenes, MARGIN = 2, FUN = min)[module_order]

modplots <- list()
modplots[[1]] <- list()
modplots[[2]] <- list()
modplots[[3]] <- list()

modules_in_order <- colnames(tmp)[module_order]

# plot the modules split to the stages
for (i in seq(my.ses)) {
  for (j in seq(ncol(tmp))) {
  
    modplots[[i]][[j]]  <- FeaturePlot(
      my.ses[[i]],
      features = modules_in_order[j],
      reduction = "tsne",
      cols = c("grey90", substring(modules_in_order[j], 3))
      ) +
      ggtitle(stringr::str_remove(modules_in_order[j],"^AE")) +
      scale_color_gradient(low="ivory2", high=substring(modules_in_order[j], 3), #colors in the scale
                 # breaks=seq(mod_min[j], mod_max[j], 0.1), #breaks in the scale bar
                 limits=c(mod_min[j], mod_max[j])) #same limits for plots

    
    }
}
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
tmp <- c(modplots[[1]], modplots[[2]], modplots[[3]])

gridExtra::grid.arrange(grobs = tmp, ncol = 3, as.table = FALSE)

pdf("~/spinal_cord_paper/figures/Devel_modules_AE_plots.pdf", width = 12, height = 70)
gridExtra::grid.arrange(grobs = tmp, ncol = 3, as.table = FALSE)

pdf("~/spinal_cord_paper/figures/Fig_1_MN_mod_tsne.pdf", height = 6, width = 12)
modplots[[1]][[22]] + modplots[[1]][[6]] |
  modplots[[2]][[22]] + modplots[[2]][[6]] |
  modplots[[3]][[22]] + modplots[[3]][[6]]

AE over time

We plot the average expression of each module in the three stages and the 5 broad cell type clusters present in all 3 stages.

# module annotations
mod_annot <- read.csv("~/spinal_cord_paper/annotations/Gg_devel_int_scWGCNA_module_annotation.csv") %>%
  dplyr::mutate(module = str_replace_all(module, "\\d{1,2}\\_", "AE"))

meta <- list()

for (i in seq(my.ses)) {
  meta[[i]] <- my.ses[[i]]@meta.data %>%
    tibble::rownames_to_column("cell_ID") %>%
    dplyr::mutate(cell_ID = paste0(cell_ID, "_", i)) %>%
    dplyr::select(c("cell_ID", "broad"))
}

meta <- do.call(rbind, meta)

# mean average expression by stage
mean_AE <- avg.mod.eigengenes %>%
  tibble::rownames_to_column("cell_ID") %>%
  dplyr::mutate(stage = stringr::str_sub(cell_ID, -1)) %>%
  dplyr::mutate(stage = factor(stage, levels = c(1:3), labels = c("D05", "D07", "D10"))) %>%
  dplyr::left_join(meta, by = "cell_ID") %>%
  tibble::column_to_rownames("cell_ID") %>%
  tidyr::unite("stage_cl", stage, broad, sep = "_") %>%
  dplyr::group_by(stage_cl) %>%
  dplyr::summarise_each(mean) %>%
  dplyr::ungroup() %>%
  gather(key="module", value = "AE", -stage_cl) %>%
  dplyr::left_join(mod_annot[, c(1,3)], by = "module") %>%
  tidyr::separate("stage_cl", c("stage", "broad"), sep = "_", remove = FALSE) %>%
  dplyr::filter(broad %in% c("progenitors", "neurons", "RP", "FP", "pericytes"))
## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `across()` instead.
## ℹ The deprecated feature was likely used in the dplyr package.
##   Please report the issue at <https://github.com/tidyverse/dplyr/issues>.
labels_dotplot <- stringr::str_remove(modules_in_order, "^AE")
names(labels_dotplot) <- modules_in_order

mean_mod <- ggplot(data = mean_AE,
  aes(
    x = stage,
    y = AE,
    color = factor(broad, levels = c("progenitors", "neurons", "RP", "FP", "pericytes")),
    group = broad,
    label = annotation
    )
  ) +
  geom_line() +
  geom_point() +
  scale_color_manual(values = c("#edc919", "#cd2b91", "#99ca3c", "#853335", "#f26a2d")) +
  theme_bw() +
  # facet wrap with reordered factors
  facet_wrap(vars(factor(module, levels = unique(mean_AE$module)[module_order])),
             scales = "free_y",
             nrow = 4,
             ncol = 6) +
  labs(color = "broad") +
  ggtitle("Average module expression by stage")

plotly::ggplotly(mean_mod)
pdf("~/spinal_cord_paper/figures/Fig_1_mean_mod_AE.pdf", height = 7, width = 12)
#Plot split tsne
mean_mod

VlnPlots of avg. module exp. by stage and seurat cluster

colored by module

# reorder seurat clusters
for (i in seq(my.ses)) {
  my.ses[[i]]$seurat_clusters <- factor(
  my.ses[[i]]$seurat_clusters,
  levels = levels(my.ses[[i]]$seurat_clusters)[as.integer(str_extract(ord_levels[[i]], "\\d{1,2}$"))]
  )

}

vplots <- list()

for (i in seq(my.ses)) {
  
  mods <- colnames(my.ses[[i]][[]])[grep("^AE",colnames(my.ses[[i]][[]]))]
  
  vplots[[i]] <- VlnPlot(
        my.ses[[i]],
        features = mods[module_order],
        group.by = "seurat_clusters",
        stack = TRUE, flip = TRUE,
        cols = substring(mods, 3)[module_order]) +
    theme(legend.position = "none") +
    geom_hline(yintercept = 0, lty = "dashed")
  
}

vplots[[1]]

vplots[[2]]

vplots[[3]]

pdf("~/spinal_cord_paper/figures/Fig_1_AE_by_cluster_modcol.pdf", height = 20, width = 10)
vplots[[1]]
vplots[[2]]
vplots[[3]]

colored by cell type

clust_col <- read.csv("~/spinal_cord_paper/annotations/broad_cluster_marker_colors.csv")

vplots_id <- list()

for (i in seq(my.ses)) {
    
  mods <- colnames(my.ses[[i]][[]])[grep("^AE",colnames(my.ses[[i]][[]]))]
  
  vplots_id[[i]] <- VlnPlot(
    my.ses[[i]],
    features = mods[module_order],
    group.by = "seurat_clusters",
    stack = TRUE, flip = TRUE,
    fill.by = "ident",
    cols = col_table[[i]]$color[as.integer(str_extract(ord_levels[[i]], "\\d{1,2}$"))]) +
    theme(legend.position = "none") +
    geom_hline(yintercept = 0, lty = "dashed")
}

vplots_id[[1]]

vplots_id[[2]]

vplots_id[[3]]

pdf("~/spinal_cord_paper/figures/Fig_1_AE_by_cluster_clucol.pdf", height = 20, width = 10)
vplots_id[[1]]
vplots_id[[2]]
vplots_id[[3]]

MN modules

For the figures we specifically select the two MN modules and plot them as Vln plots.

tmp <- avg.mod.eigengenes 
  
identical(rownames(tmp), colnames(my.sec))
## [1] TRUE
# add meta data to the int seurat object
my.sec <- AddMetaData(my.sec, tmp)
my.sec <- AddMetaData(my.sec, my.metam[c("fine", "sample_celltype")])

custom_order <- c(paste(names(ord_levels)[1], ord_levels[[1]], sep = '_'),
                  paste(names(ord_levels)[2], ord_levels[[2]], sep = '_'),
                  paste(names(ord_levels)[3], ord_levels[[3]], sep = '_'))

my.sec$sample_celltype <- factor(
  my.sec$sample_celltype,
  levels = custom_order
  )

vln_ind <- VlnPlot(my.sec,
                   features = c("AEdarkred", "AEgreen"),
                   group.by = "sample_celltype",
                   stack = TRUE,
                   flip = TRUE,
                   cols = c("darkred", "green"),pt.size = 1
                  ) +
    NoLegend()

vln_ind

pdf("~/spinal_cord_paper/figures/Fig_1_AE_MNs.pdf", height = 7, width = 20)
vln_ind
VlnPlot(
    my.sec,
    features = mods[module_order],
    group.by = "sample_celltype",
    stack = TRUE, flip = TRUE,
    cols = substring(mods, 3)[module_order]) +
    theme(legend.position = "none") +
    geom_hline(yintercept = 0, lty = "dashed")

pdf("~/spinal_cord_paper/figures/Fig_1_AE_by_cluster_integrated_data.pdf", height = 20, width = 30)
VlnPlot(
    my.sec,
    features = mods[module_order],
    group.by = "sample_celltype",
    stack = TRUE, flip = TRUE,
    cols = substring(mods, 3)[module_order]) +
    theme(legend.position = "none") +
    geom_hline(yintercept = 0, lty = "dashed")

avg. module exp. by stage

v1 <- VlnPlot(my.sec,
        features = mods[module_order], 
        group.by = "orig.ident")
v1

pdf("~/spinal_cord_paper/figures/Fig_1_AE_by_stage.pdf", height = 20, width = 20)
v1
# Date and time of Rendering
Sys.time()
## [1] "2023-08-09 08:54:58 CEST"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /scicore/soft/apps/OpenBLAS/0.3.1-GCC-7.3.0-2.30/lib/libopenblas_sandybridgep-r0.3.1.so
## 
## locale:
## [1] en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] pheatmap_1.0.12       cowplot_1.1.1         forcats_0.5.1        
##  [4] dplyr_1.0.10          purrr_0.3.4           readr_1.4.0          
##  [7] tibble_3.1.8          tidyverse_1.3.1       patchwork_1.1.1      
## [10] stringr_1.4.0         ggplot2_3.3.3         tidyr_1.1.3          
## [13] WGCNA_1.70-3          fastcluster_1.2.3     dynamicTreeCut_1.63-1
## [16] SeuratObject_4.0.2    Seurat_4.0.5         
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1             reticulate_1.22        tidyselect_1.2.0      
##   [4] RSQLite_2.2.7          AnnotationDbi_1.54.0   htmlwidgets_1.5.3     
##   [7] grid_4.1.0             Rtsne_0.15             munsell_0.5.0         
##  [10] codetools_0.2-18       ica_1.0-2              preprocessCore_1.54.0 
##  [13] future_1.30.0          miniUI_0.1.1.1         withr_2.4.2           
##  [16] colorspace_2.0-1       Biobase_2.52.0         highr_0.9             
##  [19] knitr_1.41             rstudioapi_0.13        stats4_4.1.0          
##  [22] ROCR_1.0-11            tensor_1.5             listenv_0.8.0         
##  [25] labeling_0.4.2         GenomeInfoDbData_1.2.6 polyclip_1.10-0       
##  [28] farver_2.1.0           bit64_4.0.5            parallelly_1.33.0     
##  [31] vctrs_0.5.1            generics_0.1.3         xfun_0.34             
##  [34] R6_2.5.0               doParallel_1.0.16      GenomeInfoDb_1.28.0   
##  [37] bitops_1.0-7           spatstat.utils_3.0-1   cachem_1.0.5          
##  [40] assertthat_0.2.1       promises_1.2.0.1       scales_1.1.1          
##  [43] nnet_7.3-16            gtable_0.3.0           globals_0.16.2        
##  [46] goftest_1.2-2          rlang_1.0.6            splines_4.1.0         
##  [49] lazyeval_0.2.2         impute_1.66.0          spatstat.geom_3.0-3   
##  [52] broom_0.7.6            checkmate_2.0.0        yaml_2.2.1            
##  [55] reshape2_1.4.4         abind_1.4-5            modelr_0.1.8          
##  [58] crosstalk_1.1.1        backports_1.2.1        httpuv_1.6.1          
##  [61] Hmisc_4.5-0            tools_4.1.0            ellipsis_0.3.2        
##  [64] spatstat.core_2.1-2    jquerylib_0.1.4        RColorBrewer_1.1-2    
##  [67] BiocGenerics_0.38.0    ggridges_0.5.3         Rcpp_1.0.7            
##  [70] plyr_1.8.6             base64enc_0.1-3        zlibbioc_1.38.0       
##  [73] RCurl_1.98-1.3         rpart_4.1-15           deldir_1.0-6          
##  [76] pbapply_1.4-3          S4Vectors_0.30.0       zoo_1.8-9             
##  [79] haven_2.4.1            ggrepel_0.9.1          cluster_2.1.2         
##  [82] fs_1.5.0               magrittr_2.0.1         data.table_1.14.0     
##  [85] scattermore_0.7        lmtest_0.9-38          reprex_2.0.1          
##  [88] RANN_2.6.1             fitdistrplus_1.1-6     matrixStats_0.58.0    
##  [91] hms_1.1.0              mime_0.10              evaluate_0.20         
##  [94] xtable_1.8-4           jpeg_0.1-8.1           readxl_1.3.1          
##  [97] IRanges_2.26.0         gridExtra_2.3          compiler_4.1.0        
## [100] KernSmooth_2.23-20     crayon_1.4.1           htmltools_0.5.1.1     
## [103] mgcv_1.8-35            later_1.2.0            Formula_1.2-4         
## [106] lubridate_1.7.10       DBI_1.1.1              dbplyr_2.1.1          
## [109] MASS_7.3-54            Matrix_1.3-3           cli_3.4.1             
## [112] parallel_4.1.0         igraph_1.2.6           pkgconfig_2.0.3       
## [115] foreign_0.8-81         sp_1.4-5               plotly_4.10.0         
## [118] spatstat.sparse_3.0-0  xml2_1.3.2             foreach_1.5.1         
## [121] bslib_0.2.5.1          XVector_0.32.0         rvest_1.0.2           
## [124] digest_0.6.27          sctransform_0.3.3      RcppAnnoy_0.0.19      
## [127] spatstat.data_3.0-0    Biostrings_2.60.0      rmarkdown_2.17        
## [130] cellranger_1.1.0       leiden_0.3.9           htmlTable_2.2.1       
## [133] uwot_0.1.10            shiny_1.6.0            lifecycle_1.0.3       
## [136] nlme_3.1-152           jsonlite_1.7.2         viridisLite_0.4.0     
## [139] fansi_0.5.0            pillar_1.8.1           lattice_0.20-44       
## [142] KEGGREST_1.32.0        fastmap_1.1.0          httr_1.4.2            
## [145] survival_3.2-11        GO.db_3.13.0           glue_1.6.2            
## [148] png_0.1-7              iterators_1.0.13       bit_4.0.4             
## [151] stringi_1.6.2          sass_0.4.0             blob_1.2.1            
## [154] latticeExtra_0.6-29    memoise_2.0.0          irlba_2.3.3           
## [157] future.apply_1.7.0